---
title: "Adults_Pilot_2016_Analysis"
author: "Anonymized For Peer-Review"
date: "31/03/2023"
output:
  html_document: default
  pdf_document: default
---

```{r setup, include = F}
NAME <- '3_Statistical analysis' ## Name of the R file goes here
PROJECT <- 'Megagrant_2016_Adults_Print_N170_Open_Materials'
PROJECT_DIR <- '~/Downloads/' ## Change this to the directory in which your project folder is located
knitr::opts_knit$set(root.dir = (file.path(PROJECT_DIR, PROJECT)))

library(MASS)
library(dplyr)
library(data.table)
library(openxlsx)
library("ggplot2")
library("ggpubr")
library(car)
library("robustlmm")
library(lmerTest)
library(lme4)
library(lmtest)
library(lattice)
library(effects)
library(emmeans)
library(sjPlot)
library(glmmTMB)
library(ggstatsplot)
library(psych)
library(compareGroups)
library(tab)
library(purrr)
library(Rmisc)

Sys.setlocale(,"ru_RU")
options(scipen = 999) #disable scientific notation (to prevent exponential notation)
```

```{r dplyr select problem, include=F}
dplyr.select <- dplyr::select
```

```{r include=F}
#organize files
if (dir.exists(file.path('2_pipeline'))){
  pipeline <- file.path('2_pipeline', NAME)
} else {
  pipeline <- file.path('2_pipeline', NAME)
}

if (!dir.exists(pipeline)) {
  dir.create(pipeline)
  for (folder in c('out', 'store', 'tmp')){
    dir.create(file.path(pipeline, folder))
  }
}
```


# Import 

## eps data for n170 
```{r include=F}
eps_n170 <- readRDS(file.path('2_pipeline', '2_Calculating_epsilon_means', 'out', 'N170_dataset'))%>%
  filter(Pooling%in%c("L-P", "R-P"))
eps_n170$ID <- as.factor(eps_n170$ID)
ids <- c(levels(eps_n170$ID))
```

## eps data for p100
```{r include=F}
eps_p100 <- readRDS(file.path('2_pipeline', '2_Calculating_epsilon_means', 'out', 'P100_dataset'))%>%
  filter(Pooling%in%c("L-P", "R-P"))
eps_p100$ID <- as.factor(eps_p100$ID)
```

## all erp data 
```{r include=F}
allprint_long <- read.csv("~/Downloads/Megagrant_2016_Adults_Print_N170_Open_Materials/2_pipeline/1_process data/out/Adults_Print_Pilot_alltogether_long.csv", header= TRUE)
allprint_long$X <- NULL
str(allprint_long)
allprint_long$ID <- as.factor(allprint_long$ID)
allprint_long$Gender <- as.factor(allprint_long$Gender)
levels(allprint_long$Pooling)[levels(allprint_long$Pooling)=="M-F"] <- "M-A"
```

##  CFIT data 
```{r include=F}
cfit<- read.csv("~/Downloads/Megagrant_2016_Adults_Print_N170_Open_Materials/2_pipeline/1_process data/out/CFIT_without_Outliers.csv", header= TRUE, sep = ";")

cfit <- cfit %>%  dplyr::select(CFIT_SS, ID) 
cfit$ID <- as.factor(cfit$ID)
```

## ARFA data 
```{r include=F}
arfa <- readRDS(file.path('2_pipeline', '1_process data', 'out', 'ARFA tidy'))
```

## Reaction time data 
```{r include=F}
rt_raw <- read.csv("~/Downloads/Megagrant_2016_Adults_Print_N170_Open_Materials/0_data/eeg/rt/Adults_Print_Merged_EPrime output_RT.csv", header = T)%>% filter(Condition != "")

rt_raw$Subject <-  as.factor(rt_raw$Subject)
rt <-  rt_raw%>%
  filter(Subject %in% ids)

rt$Subject <- factor(rt$Subject) # drop unused levels 
levels(eps_n170$ID)[!(levels(eps_n170$ID) %in% levels(rt$Subject))] # no record for person 781626, 51 participant in total for RT and accuracy analyses 
```  

## Demographic data
```{r include=F}
#dem <- readRDS(file.path('2_pipeline', '1_process data', 'out', 'Dem part tidy'))
dem <- read.xlsx("/Users/tatiana/Downloads/Megagrant_2016_Adults_Print_N170_Open_Materials/2_pipeline/1_process data/out/AdultStudy_Part_Demographics_2016_WithoutOutliers.xlsx", sheet = 1, na.strings = "NA", detectDates = T)

dem$Education_scored <- as.numeric(dem$Education_scored)
dem$Group <- as.factor(dem$Group)
dem$ID<- as.factor(dem$ID)
```

# Descriptives 

## merge
```{r echo=F}
descriptive <- inner_join(arfa, cfit,by=c("ID"="ID"))
descriptive <- inner_join(descriptive, dem,by=c("ID"="ID"))
```

## scale arfa scores
```{r}
cols_scaled <-  c("SenRep", "Analogies", "PWR", "Spelling", "PA", "SenCom", "Wdef") # Analogies = Synonyms subtest

descriptive_rescaled <- descriptive %>% 
  dplyr::select(., ID, Group, Age, Gender, History_of_institutionalization_years, Age_of_placement, everything())      
                              
descriptive_rescaled[cols_scaled] <- lapply(descriptive_rescaled[cols_scaled], rescale, df=FALSE) #scale arfa scores to IQ (average across two groups: mean 100 sd 15) 
```

## describe
```{r include=F}
ds <- describeBy(descriptive_rescaled, descriptive_rescaled$Group, mat = TRUE, digits = 2)
ds
```
### table with ARFA & CFIT scores
```{r}
ds2 <- reshape(dplyr::select(ds, -median, -item, -trimmed, -mad, -range, -se), v.names = c("n", "mean","sd", "min", "max", "skew", "kurtosis"), timevar = "group1", idvar = "vars", direction = "wide")

ds2$vars <- c("ID", "Group", "Age", "Gender", "Institutionalization years", "Age of placement", "Sentence Repetition", "Synonyms", "Pseudoword Repetition", "Spelling", "Phonological Awareness", "Sentence Comprehension", "Word Definition", "CFIT II", "Education", "Income", "Wellbeing", "N of books")

descriptives <- ds2[7:14, ] # descriptives for ARFA & CFIT 
```

### supplemental table with demographics
```{r}
ds3 <- compareGroups(Group ~ . - ID  -SenRep -Analogies - PWR - Spelling - PA - SenCom - Wdef -CFIT_SS, data = descriptive_rescaled, method = c(Gender = 3, Age = 1, Education_scored =3, Income_scored = 3,Wellbeing_scored = 3, N_books_scored =3, History_of_institutionalization_years = 1, Age_of_placement = 1), chisq.test.perm = FALSE, alpha = 0.01, include.label = FALSE) #the glm warning is due to History of inst variable; use chisq.test.perm = T if want to run a chisq test instead of Fisher's for categorical variables, here we use Fisher's, as in a lot of cells the numbers are <5; the warning about chisq if because compareGoups tries to perform a Chi-squared test. But when any cell has expected count smaller than 5 it performs a Fisher exact test

descriptives_suppl <- createTable(ds3)
descriptives_suppl
#summary(ds3)
ds3 # NB, compareGroups does not consider NA as a category, and we have missing values in some variables 

#Use tab package instead for freq tables 
Education <- factor(descriptive_rescaled$Education_scored, exclude=NULL)
Income <- factor(descriptive_rescaled$Income_scored, exclude=NULL)
Wellbeing <- factor(descriptive_rescaled$Wellbeing_scored, exclude=NULL)
Number_of_Books <- factor(descriptive_rescaled$N_books_scored, exclude=NULL)

tabfreq(x = descriptive_rescaled$Group, y = Education, test="fisher", kable=F)
tabfreq(x = descriptive_rescaled$Group, y = Income, test="fisher", kable=F)
tabfreq(x = descriptive_rescaled$Group, y = Wellbeing, test="fisher", kable=F)
tabfreq(x = descriptive_rescaled$Group, y = Number_of_Books,test="fisher",kable=F)
```
# RT & Accuracy analysis 

## Merge

## Prepare dataset
```{r}
rt <- inner_join(select(rt,-Group), dem, by=c("Subject" = "ID"))

rt$Condition <-  factor(rt$Condition)
levels(rt$Condition)[levels(rt$Condition)=="AO"] <- "SS"
levels(rt$Condition)[levels(rt$Condition)=="IP"] <- "NW"
levels(rt$Condition)[levels(rt$Condition)=="LP"] <- "PW"

rt$Condition <- factor(rt$Condition, levels = c("HF","LF", "NW","PW", "SS")) # reorder levels of factor

rt_tidy <- rt%>%
  filter(response.RT >199 & response.RT<3001) # filter responses as in ERP preprocessing 
rt_tidy$logRT <- log(rt_tidy$response.RT)

rt_tidy_summary <- rt_tidy %>%
  group_by(Subject, Condition, Group) %>%
  dplyr::summarise(mean_RT = mean(response.RT), n.trials = n())

rt_tidy_correct_sum <- rt_tidy %>%
  filter(response.ACC == 1) %>%
  group_by(Subject, Condition, Group) %>%
  dplyr::summarise(mean_RT = mean(response.RT), n.trials = n())

rt_tidy_incorrect_sum <- rt_tidy %>%
  filter(response.ACC == 0) %>%
  group_by(Subject, Condition, Group) %>%
  dplyr::summarise(mean_RT = mean(response.RT), n.trials = n())

rt_tidy_acc <- rt_tidy %>%
  group_by(Subject, Condition, Group) %>%
  dplyr::summarise(mean_acc = mean(response.ACC), mean_acc_percent = mean(response.ACC)*100)
```

## Check distribution of RT
```{r}
ggplot(rt_tidy, aes(logRT, fill= Group))+
  geom_density(alpha=0.5)

ggplot(rt_tidy, aes(response.RT, fill= Group))+
  geom_density(alpha=0.5)
```

## Plot reaction time by group

```{r}
labels <- c("HighFreq", "LowFreq", "Nonwords", "Pseudowords", "Symbols")
```

### Accuracy
```{r fig.height=5, fig.width=5}
# accuracy
plot.Accuracy <- ggplot(rt_tidy_acc, aes(x = Group, y = mean_acc_percent, color = Group, group = Group))+
  geom_violin()+
  stat_summary(fun.data = mean_cl_boot, geom = 'errorbar', width = 0.1, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom = 'point', size = 3, position = position_dodge(0.2))+
  labs(x ="Experimental Conditions", y = "Accuracy: percentage")+
  scale_color_manual(values = c("#FF3366", "#66CCCC"), name="Group",labels=c("BFC", "IC"))+
  theme_bw()+
  facet_wrap(~Condition)
plot.Accuracy
```

### RT overall
```{r include=F}
plot.RT_overall <- ggplot(rt_tidy_summary, aes(x = Condition, y = mean_RT, color = Group, group = Group))+
  stat_summary(fun.data = mean_cl_boot, geom = 'errorbar', width = 0.1, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom = 'point', size = 3, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom =  'line', position = position_dodge(0.2))+
  labs(x ="Experimental Conditions", y = "Reaction time filtered 200-3000 ms")+
  scale_color_manual(values = c("#FF3366", "#66CCCC"), name="Group",labels=c("BFC", "IC"))+
  scale_x_discrete(labels=labels)+
  theme_bw()
plot.RT_overall
```

### RT correct only
```{r include=F, fig.height=5, fig.width=5}
#  bars
plot.RT_correct <- ggplot(rt_tidy_correct_sum, aes(x = Condition, y = mean_RT, color = Group, group = Group))+
  stat_summary(fun.data = mean_cl_boot, geom = 'errorbar', width = 0.1, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom = 'point', size = 3, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom =  'line', position = position_dodge(0.2))+
  labs(x ="Experimental Conditions", y = "Reaction time filtered 200-3000 ms. Correct trials")+
  scale_color_manual(values = c("#FF3366", "#66CCCC"), name="Group",labels=c("BFC", "IC"))+
  scale_x_discrete(labels= labels)+
  theme_bw()
plot.RT_correct
```

```{r fig.height=5, fig.width=5}
#  violins
plot.RT_correct.violin <- ggplot(rt_tidy_correct_sum, aes(x = Group, y = mean_RT, color = Group, group = Group))+
  geom_violin()+
  stat_summary(fun.data = mean_cl_boot, geom = 'errorbar', width = 0.1, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom = 'point', size = 3, position = position_dodge(0.2))+
  labs(x ="Experimental Conditions", y = "Reaction time filtered 200-3000 ms. Correct trials")+
  scale_color_manual(values = c("#FF3366", "#66CCCC"), name="Group",labels=c("BFC", "IC"))+
  theme_bw()+
  facet_wrap(~Condition)
plot.RT_correct.violin
```


### RT incorrect only
```{r include =F}
plot.RT_incorrect <- ggplot(rt_tidy_incorrect_sum, aes(x = Condition, y = mean_RT, color = Group, group = Group))+
  stat_summary(fun.data = mean_cl_boot, geom = 'errorbar', width = 0.1, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom = 'point', size = 3, position = position_dodge(0.2))+
  stat_summary(fun.data = mean_cl_boot, geom =  'line', position = position_dodge(0.2))+
  labs(x ="Experimental Conditions", y = "Reaction time filtered 200-3000 ms. Incorrect probes")+
  scale_color_manual(values = c("#FF3366", "#66CCCC"), name="Group",labels=c("BFC", "IC"))+
  scale_x_discrete(labels=labels)+
  theme_bw()
plot.RT_incorrect
```

## LMM 
### Accuracy 

```{r}
freqtable <- xtabs(~Condition+Group+response.ACC, data=rt_tidy)
freqtable #just check there is enough info for each category - ok
```
#### glmer - logistic
```{r}
# in the following models, the default treatment coding was used, but we do not need to interpret the coefficients

rt_tidy$response.ACC <-  as.factor (rt_tidy$response.ACC)
rt_tidy$Subject <-  as.factor (rt_tidy$Subject)

acc_empty <- glmer(response.ACC ~  (1|Subject) + (1 | TrialN) , family = "binomial", data = rt_tidy)
summary(acc_empty)
# icc(acc_empty) #0.442


acc1 <- glmer(response.ACC ~ Group*Condition + (1|Subject) + (1 | TrialN), family = "binomial",  data = rt_tidy) # convergence failure
#summary(acc1)  

#acc2 <- update(acc1,control=glmerControl(optCtrl=list(maxfun=2e5))) #try different max number of iterations -> same
#summary(acc2)

acc3 <- update(acc1,control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5))) #try different max number of iterations AND another optimizer 

summary(acc3, corr =F) # beta represents the change in the logit of the outcome variable associated with a one-unit change in the predictor variable, the logit of the outcome is ln of the odds of Y occurring

plot(allEffects(acc3), multiline=TRUE, ci.style="bars")

(4951.5 - 4837.2)/4951.5 #R2 analogue in logistic regressions: calculated by dividing the model chi-square, which represents the change from the baseline (based on the log-likelihood) by the baseline ???2LL (the deviance of the model before any predictors were entered) -->  2% of variance is explained by our predictors of Group & Condition, a lot is explained solely by individual Subject and Stimuli variations, AIC and BIC are lower in the complete model

anova(acc_empty, acc3)
```

##### CIs using se
```{r}
se <- sqrt(diag(vcov(acc3)))

# table of estimates with 95% CI
(tab <- cbind(Estimate = fixef(acc3), LowerLimits = fixef(acc3) - 1.96 * se, UpperLimits = fixef(acc3) + 1.96 *
    se))

# odds-ratios instead of logits
exp(tab) # This works like this: probability (p - table below) of the event occurring. Then we calculate odds for the reference point: say, odds for BF to get score 1 in the condition LF is 0.8338592/(1-0.8338592) = 5.0189911208. Then we calculate what happens when there is a unit change in the predictor variable (BF -> IC): odds for IC to get score 1 in the condition LF is 0.7368419/(1-0.7368419) = 2.799997036. Then we calculate change in the odds 2.799997036/5.0189911208
```

##### probabilities
```{r}
Effect(c("Group", "Condition"), acc3)
```

#### sum coding: confirmatory
```{r acc sum coding}
# Sum-to-zero coding: confirmatory
rt_sum_acc<- rt_tidy


contrasts(rt_sum_acc$Condition) = contr.sum(5)
contrasts(rt_sum_acc$Group) = contr.sum(2)

acc4 <- glmer(response.ACC ~ Group*Condition + (1|Subject) + (1|TrialN), family = "binomial",rt_sum_acc, na.action=na.omit, control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5))) #number of iterations and optimizer as in model acc3

summary(acc4, corr =T) # beta represents the change in the logit of the outcome variable associated with a one-unit change in the predictor variable, the logit of the outcome is ln of the odds of Y occurring
##Interestingly, the variance in random effect larger for trials than for subjects for accuracy, for rt vice versa 
### Cond1 <-  HF, Cond 2 <- LF, etc; SS are not demonstrated in the summary output because could be calculated as a linear combination from other coefficients, in sum coding you can choose which level to "omit" 
#### Group coef <-  difference between Grand Mean and each level of Group, Condition <-  difference between Grand Mean and each level of Condition, Group:Condition <-  difference from the Group coef difference for each level of Cond - we do not use that (although, we can see, that for HF probably there are no group differences, for LF it is smaller than Mean difference, and for PW and NW it is larger that Mean difference )
```


##### estimates and CIs using se
```{r acc CIs}
se2 <- sqrt(diag(vcov(acc4)))

# table of estimates with 95% CI
round((tab2 <- cbind(Estimate = fixef(acc4), LowerLimits = fixef(acc4) - 1.96 * se2, UpperLimits = fixef(acc4) + 1.96 *
    se2)), digits =2)

# odds-ratios instead of logits
round(exp(tab2), digits = 2) # This works like this: probability (p - see table above) of the event occurring. Then we calculate odds for the reference point: say, odds for BF to get score 1 in the condition LF is 0.8338592/(1-0.8338592) = 5.0189911208. Then we calculate what happens when there is a unit change in the predictor variable (BF -> IC): odds for IC to get score 1 in the condition LF is 0.7368419/(1-0.7368419) = 2.799997036. Then we calculate change in the odds 2.799997036/5.0189911208

tab_model(acc4) #nicely looking summary of lmer model
```

##### pairwise contrasts
```{r acc pairwise contrasts}
# Between-group comparisons within Condition using emmeans 
contrast(emmeans(acc4, ~ Group|Condition), "pairwise", adjust="none") #group differences in all Conditions, except HF 

#contrast(emmeans(acc4, ~ Condition|Group), "pairwise", adjust="none") # between-Condition comparisons within each Group; this is exploratory: we wanted to check what drives the condition differences from the grand mean in the main model. For the BF no conditions except for LF are different from HF (the easiest); for IC all conditions differ from HF (they all are harder)
```

#### deviation coding: exploratory
```{r acc deviation coding, include = F}
# Deviation coding: exploratory; Alternative name - simple effect coding
rt_dev_acc<- rt_tidy

rt_dev_acc <- rt_dev_acc %>%
  mutate(Gr = if_else(Group == "BF", -1/2, 1/2)) # target A2

rt_dev_acc <- rt_dev_acc %>%
  mutate(LowFreq = if_else(Condition == "LF", 4/5, -1/5), # target A2
         Nonwords = if_else(Condition == "NW", 4/5, -1/5), # target A3
         Pseudowords = if_else(Condition == "PW", 4/5, -1/5), # target A4
         Symbols = if_else(Condition == "SS", 4/5, -1/5)) # target A5


acc5 <- glmer(response.ACC ~ Gr*LowFreq*Nonwords*Pseudowords*Symbols + (1|Subject) + (1|TrialN), family = "binomial",rt_dev_acc, na.action=na.omit, control=glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=2e5)))

summary(acc5, corr =F)   #model is rank deficient, meaning our hypotheses  are  collinear, that is, that one of the contrasts can be generated from the other contrasts by linear combination (our Group & Condition contrast) 
```


### RT
#### lmer
```{r}
# model_rt <- lmer(logRT ~ Group*Condition + (1|Subject), rt_tidy, na.action=na.omit,REML = FALSE) #non-robust linear mixed models with random effects for subjects only and whole both correct and incorrect answers
# summary(model_rt)

model_rt_empty <- lmer(logRT ~ (1|Subject) + (1|TrialN), rt_tidy%>%filter(response.ACC == 1), na.action=na.omit,REML = FALSE) #non-robust linear mixed models: random intercept for each subject and stimulus but no interaction for these random effects; correct answers only
#icc(model_rt_empty) #0.354

model_rt2 <- lmer(logRT ~ Group*Condition + (1|Subject) + (1|TrialN), rt_tidy%>%filter(response.ACC == 1), na.action=na.omit,REML = FALSE) #non-robust linear mixed models: random intercept for each subject and stimulus but no interaction for these random effects; correct answers only
summary(model_rt2)
```

#### sum coding: confirmatory 
```{r rt sum coding}
# Sum-to-zero coding: confirmatory
rt_sum<- rt_tidy%>%filter(response.ACC == 1)

contrasts(rt_sum$Condition) = contr.sum(5)
contrasts(rt_sum$Group) = contr.sum(2)


model_rt3 <- lmer(logRT ~ Group*Condition + (1|Subject) + (1|TrialN), rt_sum, na.action=na.omit,REML = FALSE) #non-robust linear mixed models: random intercept for each subject and stimulus but no interaction for these random effects; correct answers only

summary(model_rt3) # the interpretation of coefficients <-  see sum-coded model for accuracy
```

##### estimates and CIs
```{r rt estimates}
Effect(c("Group", "Condition"), model_rt3)

tab_model(model_rt3) #nicely looking summary of lmer model

plot(allEffects(model_rt3), multiline=TRUE, ci.style="bars") #plot marginal means with CIs 
```

##### pairwise contrasts
```{r rt pairwise contrasts}
contrast(emmeans(model_rt3, ~ Group|Condition), "pairwise", adjust="none") #group differences in all Conditions, except SS 

#ls_means(
#model_rt3,
#which = NULL,
#level = 0.95,
#ddf = c("Satterthwaite", "Kenward-Roger"), pairwise = T) #ls means approximate degrees of freedom with Satterthwaite, the coefficients, SEs and p's are close to emmeans

```

#### deviation coding: exploratory
```{r rt deviation coding, include = F}
# Deviation coding: exploratory; Alternative name - simple effect coding

rt_dev<- rt_tidy%>%filter(response.ACC == 1)

rt_dev <- rt_dev %>%
  mutate(Gr = if_else(Group == "BF", -1/2, 1/2)) # target A2

rt_dev <- rt_dev %>%
  mutate(LowFreq = if_else(Condition == "LF", 4/5, -1/5), # target A2
         Nonwords = if_else(Condition == "NW", 4/5, -1/5), # target A3
         Pseudowords = if_else(Condition == "PW", 4/5, -1/5), # target A4
         Symbols = if_else(Condition == "SS", 4/5, -1/5)) # target A5

model_rt4 <- lmer(logRT ~ Gr*LowFreq*Nonwords*Pseudowords*Symbols + (1|Subject) + (1|TrialN), rt_dev, na.action=na.omit,REML = FALSE) #non-robust linear mixed models: random intercept for each subject and stimulus but no interaction for these random effects; correct answers only

summary(model_rt4) #model is rank deficient, meaning our hypotheses  are  collinear, that is, that one of the contrasts can be generated from the other contrasts by linear combination (our Group & Condition contrast)
```


# N170 analysis 

## Merge
```{r echo=F}
eps_n170 <- inner_join(eps_n170, cfit,by=c("ID"="ID"))

eps_n170$ID <-  as.factor(eps_n170$ID)
eps_n170$Pooling <- factor(eps_n170$Pooling) #drop unused levels 
head(eps_n170)
```

## Center the variables
```{r}
# centering with 'scale()'
center_scale <- function(x) {
  scale(x, center = TRUE, scale = FALSE)
}

# mean-centering of continuous variables
eps_n170$CFIT_centered <- center_scale(eps_n170$CFIT_SS)
eps_n170$Age_centered <- center_scale(eps_n170$Age)

eps_n170$CFIT_centered <-as.vector(t(eps_n170$CFIT_centered)) # you should consider another function
eps_n170$Age_centered <- as.vector(t(eps_n170$Age_centered))
```

## Check distribution
```{r}
ggplot(eps_n170, aes(N170.mean, fill= Group))+
  geom_density(alpha=0.5)+
  facet_wrap(~Pooling)
```

## LMM 

### empty model
```{r}
empty_n170 <- lmer(N170.mean ~  (1|ID), eps_n170, na.action=na.omit, REML = FALSE)

summary(empty_n170)

4.327/(4.327 + 3.792) # ICC 0.5329474 = 53 % of total variability exists at the individual level

#empty_n170.2 <- lmer(N170.mean ~  (1 + Type|ID), eps_n170, na.action=na.omit,REML = FALSE)

#summary(empty_n170.2)
```

### rlmer
```{r}
rmodel_n170_1 <- rlmer(N170.mean ~  Group*Pooling*Type +(1|ID), eps_n170, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer 1 (Sergey's code) # you can add Pooling as random slope in the random effects 
summary(rmodel_n170_1) # symbols differ from other stimuli types, and its amp is different for lp and rp; no group differences

rmodel_n170_2 <- rlmer(N170.mean ~ CFIT_centered+ Group*Pooling*Type +(1|ID), eps_n170, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) 

rmodel_n170_3 <-  rlmer(N170.mean ~ Age_centered + Group*Pooling*Type +(1|ID), eps_n170, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28))

rmodel_n170_4 <-  rlmer(N170.mean ~ Age_centered + CFIT_centered+ Group*Pooling*Type +(1|ID), eps_n170, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28))
```
#### model comparisons
```{r}
compare(rmodel_n170_1, rmodel_n170_2, rmodel_n170_3, rmodel_n170_4, show.rho.functions = F)
```

### lmer
```{r echo=F}
model_n170_1 <- lmer(N170.mean ~ Group*Type*Pooling + (1|ID), eps_n170, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_n170_1)
anova(model_n170_1)

model_n170_2 <- lmer(N170.mean ~ CFIT_centered+Group*Pooling*Type + (1|ID), eps_n170, na.action=na.omit,REML = FALSE)

model_n170_3 <- lmer(N170.mean ~  Age_centered + CFIT_centered+Group*Pooling*Type + (1|ID), eps_n170, na.action=na.omit,REML = FALSE) 

model_n170_4 <- lmer(N170.mean ~ Gender + Age_centered + CFIT_centered+Group*Pooling*Type + (1|ID), eps_n170, na.action=na.omit,REML = FALSE)
```

#### model comparisons
```{r}
anova(model_n170_1, model_n170_2, model_n170_3, model_n170_4)
```
```{r}
anova(empty_n170, model_n170_1)
performance::icc(rmodel_n170_1)

model_n170_1.1 <- lmer(N170.mean ~ Group + (1|ID), eps_n170, na.action=na.omit,REML = FALSE)
model_n170_1.2 <- lmer(N170.mean ~ Group + Type + (1|ID), eps_n170, na.action=na.omit,REML = FALSE)
model_n170_1.3 <- lmer(N170.mean ~ Group + Type + Pooling + (1|ID), eps_n170, na.action=na.omit,REML = FALSE)


anova(empty_n170, model_n170_1.1,model_n170_1.2, model_n170_1.3, model_n170_1)


```
## Assumptions check
####lmer 
```{r}
# 1 linearity
plot.linearity<-plot(resid(model_n170_1), eps_n170$N170.mean) # the residuals vs observered

# 2 residuals should be approximately normally distributed
qqnorm(resid(model_n170_1))
qqmath(model_n170_1)

# 3 variance of the residuals should be equal across groups/subjects (homocedasticity)
plot(model_n170_1)

eps_n170$resid_lmer<- residuals(model_n170_1) #extracts the residuals and places them in a new column in our original data table
eps_n170$resid_lmer_abs <-abs(eps_n170$resid_lmer) #creates a new column with the absolute value of the residuals
eps_n170$resid_lmer2 <- eps_n170$resid_lmer_abs^2 #squares the absolute values of the residuals to provide the more robust estimate
levene.model <- lm(resid_lmer2 ~ ID, data=eps_n170) #ANOVA of the squared residuals
anova(levene.model) #displays the results

# collection of functions
plot_model(model_n170_1, type='diag') # you can adjust type (see package info: ?plot_model)
```

#### rlmer 
```{r}
plot(rmodel_n170_1) # based on lmer & rlmer diagnostic plots and betas we can conclude we need to use robust 
```
## Plot effects 
```{r fig.height=6, fig.width =6}
# plot

coef.plot.names <- c("IC Group : R-P Pooling : Symbols", "IC Group : R-P Pooling : Pseudowords", "IC Group : R-P Pooling : Nonwords", "IC Group : R-P Pooling : LowFreq words", "R-P Pooling : Symbols", "R-P Pooling : Pseudowords", "R-P Pooling : Nonwords", "R-P Pooling : LowFreq words", "IC Group : Symbols", "IC Group : Pseudowords", "IC Group : Nonwords", "IC Group : LowFreq words", "IC Group : R-P Pooling", "Symbols", "Pseudowords", "Nonwords", "LowFreq words", "R-P Pooling", "IC Group", "Intercept (HighFreq words)")
coef.plot.names <- rev(coef.plot.names)

n170_rlmer_plot <- ggstatsplot::ggcoefstats(
  x = rmodel_n170_1,
  conf.level = 0.95,
  exclude.intercept = FALSE,
    only.significant = FALSE,
  package = "ggsci",
  palette = "category20c_d3", 
  title="N170.Robust Estimation of Linear Mixed-Effects Model"
) +
  ggplot2::scale_y_discrete(labels = coef.plot.names) +
  ggplot2::labs(x = "Regression Coefficient: N170 amplitude", y = "Fixed Effects" )

n170_rlmer_plot
```
## Emmeans contrasts
```{r include=T}
Effect(c("Group", "Pooling", "Type"), rmodel_n170_1)


plot(effect("Group:Pooling:Type", rmodel_n170_1),
         axes=list(grid=TRUE)) # equivalent (plus grid)

emmeans(rmodel_n170_1, ~Type*Group|Pooling)
#contrast(emmeans(rmodel_n170_1, ~ Type|Group*Pooling), "pairwise", adjust="none") #try also without "pairwaise" & "consec"
#contrast(contrast(emmeans(rmodel_n170_1,~Group|Type*Pooling), "pairwise"), "pairwise", by=NULL)
#
#
n170emm <- contrast(emmeans(rmodel_n170_1, ~ Group|Type*Pooling), "pairwise", adjust="none")
summary(n170emm, infer=TRUE)


#eff_size(emm, sigma = sigma(rmodel_n170_1), edf = df.residual(rmodel_n170_1))


plot_model(model_n170_1, type = "pred", terms= c("Group", "Type", "Pooling"))

plot_model(rmodel_n170_1, type = "pred", terms= c("Type", "Group", "Pooling"))
```

## Different coding schemes

### sum coding: confirmatory 
```{r n170 sum coding, include=T}
# Sum-to-zero coding: confirmatory
#Investigate the main effect of the Group across all conditions and 2 clusters 

eps_n170_sum <- eps_n170 
contrasts(eps_n170_sum$Type) = contr.sum(5)
#contrasts(eps_n170_sum$Group) = contr.sum(2) # commented out, so for group factor we use default dummy coding, BF as a reference level 
contrasts(eps_n170_sum$Pooling) = contr.sum(2)


model_n170_sum <- lmer(N170.mean ~ Group*Type*Pooling + (1|ID), eps_n170_sum, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_n170_sum)
anova(model_n170_sum)

rmodel_n170_sum <- rlmer(N170.mean ~  Group*Pooling*Type +(1|ID), eps_n170_sum, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer,  you can add Pooling as random slope in the random effects 

summary(rmodel_n170_sum)
tab_model(rmodel_n170_sum, show.stat = T) #nicely formatted table, how p-values are computed by default with Kenward-Rogers approximation, but we do not use it 
```

### custom coding 1: confirmatory
```{r n170 custom coding 1}
# Custom coding 1: confirmatory
# The difference between processing of linguistic (average of HF, LF, PW, NW) and non-linguistic stimuli (SS) in the LP cluster in both groups; 

eps_n170_custom <- eps_n170 %>%
  filter(Pooling == "L-P") #LP only 
  
eps_n170_custom$factor <- with(eps_n170_custom, interaction(Group, Type), drop = TRUE)


HcSum <- rbind(
  cH00=c(BFHF=1/10, ICHF=1/10, BFLF=1/10,ICFLF=1/10, BFNW =1/10, ICNW =1/10, BFPW=1/10, ICPW=1/10, BFSS =1/10, ICSS =1/10),
  MainEffGr=c(BFHF=1/5, ICHF=-1/5, BFLF=1/5,ICLF=-1/5, BFNW =1/5, ICNW =-1/5, BFPW=1/5, ICPW=-1/5, BFSS =1/5, ICSS =-1/5),
  LvsS_BF=c(BFHF=1/4, ICHF=0, BFLF=1/4,ICLF=0, BFNW =1/4, ICNW =0, BFPW=1/4, ICPW=0, BFSS =-1, ICSS =0),
  LvsS_IC=c(BFHF=0, ICHF=1/4, BFLF=0,ICLF=1/4, BFNW =0, ICNW =1/4, BFPW=0, ICPW=1/4, BFSS =0, ICSS =-1),
  H5=c(BFHF=1, ICHF=-1, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H6=c(BFHF=0, ICHF=0, BFLF=1,ICLF=-1, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H7=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =1, ICNW =-1, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H8=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H9=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H10=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0))

###H5= HF_BFvsIC, H6 = LF_BFvsIC, H7 = NW_BFvsIC , H8-H10 are needed to remain this way

fractions(t(HcSum))

ginv2 <- function(x)
  fractions(provideDimnames(ginv(x), 
                            base=dimnames(x)[2:1]))
(XcSum <-  ginv2(HcSum))

contrasts(eps_n170_custom$factor) = XcSum
#contrasts(eps_n170_custom$Group) = contr.sum(2)


model_n170_custom <- rlmer(N170.mean ~ factor + (1|ID), eps_n170_custom, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 

summary(model_n170_custom)
tab_model(model_n170_custom, show.stat = T) #nicely formatted table, how p-values are computed by default with Kenward-Rogers approximation, but we do not use it 
```
S_vs_Ling=c(HF=-1/4, LF=-1/4, NW=-1/4, PW =-1/4, SS =1), Wds_vs_Pw=c(HF=1/2, LF=1/2, NW=-1/2, PW =-1/2, SS =0), HF_vs_LF=c(HF=1, LF=-1, NW=0, PW =-0, SS =0), PW_vs_NW=c(HF=0, LF=0, NW=-1, PW =1, SS =0))

### custom coding 2: confirmatory
```{r n170 custom coding 2}
# Custom coding 2: confirmatory 
# The difference between processing of linguistic (average of HF, LF, PW, NW) and non-linguistic stimuli (SS) across two electrode clusters 
eps_n170_custom_2 <- eps_n170 

eps_n170_custom_2$factor <- with(eps_n170_custom_2, interaction(Group, Type, Pooling), drop = TRUE)

HcSum2 <- rbind(
  Ling_Pool_BF=c(BFHFLP=1/4, ICHFLP=0, BFLFLP=1/4,ICLFLP=0, BFNWLP =1/4, ICNWLP =0, BFPWLP=1/4, ICPWLP=0, BFSSLP =0, ICSSLP =0, BFHFRP=-1/4, ICHFRP=0, BFLFRP=-1/4,ICLFRP=0, BFNWRP =-1/4, ICNWRP =0, BFPWRP=-1/4, ICPWRP=0, BFSSRP =0, ICSSRP =0),
       Ling_Pool_IC=c(BFHFLP=0, ICHFLP=1/4, BFLFLP=0,ICLFLP=1/4, BFNWLP =0, ICNWLP =1/4, BFPWLP=0, ICPWLP=1/4, BFSSLP =0, ICSSLP =0, BFHFRP=0, ICHFRP=-1/4, BFLFRP=0,ICLFRP=-1/4, BFNWRP =0, ICNWRP =-1/4, BFPWRP=0, ICPWRP=-1/4, BFSSRP =0, ICSSRP =0),
  
      SS_Pool_BF=c(BFHFLP=0, ICHFLP=0, BFLFLP=0,ICLFLP=0, BFNWLP =0, ICNWLP =0, BFPWLP=0, ICPWLP=0, BFSSLP =1, ICSSLP =0, BFHFRP=0, ICHFRP=0, BFLFRP=0,ICLFRP=0, BFNWRP =0, ICNWRP =0, BFPWRP=0, ICPWRP=0, BFSSRP =-1, ICSSRP =0),

      SS_Pool_IC=c(BFHFLP=0, ICHFLP=0, BFLFLP=0,ICLFLP=0, BFNWLP =0, ICNWLP =0, BFPWLP=0, ICPWLP=0, BFSSLP =0, ICSSLP =1, BFHFRP=0, ICHFRP=0, BFLFRP=0,ICLFRP=0, BFNWRP =0, ICNWRP =0, BFPWRP=0, ICPWRP=0, BFSSRP =0, ICSSRP =-1))

#MainEffPool=c(BFHFLP=1/10, ICHFLP=1/10, BFLFLP=1/10,ICLFLP=1/10, BFNWLP =1/10, ICNWLP =1/10, BFPWLP=1/10, ICPWLP=1/10, BFSSLP =1/10, ICSSLP =1/10, BFHFRP=-1/10, ICHFRP=-1/10, BFLFRP=-1/10,ICLFRP=-1/10, BFNWRP =-1/10, ICNWRP =-1/10, BFPWRP=-1/10, ICPWRP=-1/10, BFSSRP =-1/10, ICSSRP =-1/10)
  
fractions(t(HcSum2))

ginv2 <- function(x)
  fractions(provideDimnames(ginv(x), 
                            base=dimnames(x)[2:1]))
(XcSum2 <-  ginv2(HcSum2))

contrasts(eps_n170_custom_2$factor) = XcSum2
#contrasts(eps_n170_custom$Group) = contr.sum(2)


model_n170_custom_2 <- rlmer(N170.mean ~ factor + (1|ID), eps_n170_custom_2, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_n170_custom_2)

tab_model(model_n170_custom_2, show.stat = T) #nicely formatted table, how p-values are computed by default with Kenward-Rogers approximation, but we do not use it 
```

### deviation coding: exploratory 
```{r deviation coding, include = F}
# Deviation coding: exploratory
eps_n170_dev <- eps_n170 %>%
  mutate(Gr = if_else(Group == "BF", -1/2, 1/2)) # target A2

eps_n170_dev <- eps_n170_dev %>%
  mutate(Plg = if_else(Pooling == "L-P", -1/2, 1/2))

eps_n170_dev <- eps_n170_dev %>%
  mutate(LowFreq = if_else(Type == "Lowfreq", 4/5, -1/5), # target A2
         Nonwords = if_else(Type == "Non-words", 4/5, -1/5), # target A3
         Pseudowords = if_else(Type == "Pseudowords", 4/5, -1/5), # target A4
         Symbols = if_else(Type == "Symbols", 4/5, -1/5)) # target A5

rmodel_n170_dev <- rlmer(N170.mean ~ Gr*Plg*LowFreq*Nonwords*Pseudowords*Symbols + (1|ID), eps_n170_dev, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #non-robust linear mixed models 
summary(rmodel_n170_dev)

```

### poly coding: exploratory
```{r poly coding}
# Poly coding 
eps_n170_poly <- eps_n170 %>%
  mutate(Gr = if_else(Group == "BF", -1/2, 1/2))%>%filter(Pooling == "L-P") # target A2

#eps_n170_poly <- eps_n170_poly %>%
  #mutate(Plg = if_else(Pooling == "L-P", -1/2, 1/2))
  
#levels(eps_n170_poly$Type) <- c("HighFreq","Pseudowords","Lowfreq", "Non-words","Symbols")
#levels(eps_n170_poly$Type)

contrasts(eps_n170_poly$Type) = contr.poly(5)

model_n170_poly <- lmer(N170.mean ~ Type*Gr + (1|ID), eps_n170_poly, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_n170_poly)


dataX <-  model.matrix( ~ 1+Type, eps_n170_poly)
eps_n170_poly [, c("L", "Q", "C")] <- dataX [, 2:4]

model_n170_poly_2 <- lmer(N170.mean ~ (L + Q +C)*Gr+(1|ID), eps_n170_poly, na.action=na.omit,REML = FALSE)
summary(model_n170_poly_2)

aov <- anova(model_n170_poly_2)
aov

aov$`Sum Sq`/sum(aov$`Sum Sq`) #The results show that the expected linear trend explains 62% of the variance in condition means of factor Type and quadratic trend explains 34%
```


# P100 analysis 

## Merge
```{r echo=F}
eps_p100 <- inner_join(eps_p100, cfit,by=c("ID"="ID"))
eps_p100$Pooling <- factor(eps_p100$Pooling) #drop unused levels 
```

## Center the variables 
```{r}
# mean-centering of continuous variables
eps_p100$CFIT_centered <- center_scale(eps_p100$CFIT_SS)
eps_p100$Age_centered <- center_scale(eps_p100$Age)

eps_p100$CFIT_centered <-as.vector(t(eps_p100$CFIT_centered)) # you should consider another function
eps_p100$Age_centered <- as.vector(t(eps_p100$Age_centered))
```

```{r}
ggplot(eps_p100, aes(P100.mean, fill= Group))+
  geom_density(alpha=0.5)+
  facet_wrap(~Pooling)
```

## LMM
### rlmer
```{r}
empty_p100<- rlmer(P100.mean ~  (1|ID), eps_p100, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #empty model for p100
summary(empty_p100)
2.265/(2.265+2.671) #ICC= 0.4588736


rmodel_p100 <- rlmer(P100.mean ~  Group*Pooling*Type +(1|ID), eps_p100, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer 1 (Sergey's code)

summary(rmodel_p100) # difference between lp and rp; no group differences
```
### lmer
```{r}
model_p100 <- lmer(P100.mean ~ Group*Pooling*Type + (1|ID), eps_p100, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_p100)
```

## Assumptions check
#### rlmer
```{r}
plot(rmodel_p100) # it is not that bad; coef from lmer and rlmer differ just a little bit, we can probably use whatever we want 
```

## Plot effects
```{r fig.height=6, fig.width =6}
p100_rlmer_plot <- ggstatsplot::ggcoefstats(
  x = rmodel_p100,
  conf.level = 0.95,
  exclude.intercept = FALSE,
    only.significant = FALSE,
  package = "ggsci",
  palette = "category20c_d3", 
  title="P100.Robust Estimation of Linear Mixed-Effects Model"
) +   ggplot2::scale_y_discrete(labels = coef.plot.names) +
  ggplot2::labs(x = "Regression Coefficient: P100 amplitude", y = "Fixed Effects" )

p100_rlmer_plot
```


## Emmeans contrast 
```{r}
Effect(c("Group", "Pooling", "Type"), rmodel_p100)

plot(effect("Group:Pooling:Type", rmodel_p100),
         axes=list(grid=TRUE)) # equivalent (plus grid)

emmeans(rmodel_p100, ~Type*Group|Pooling)

#contrast(emmeans(rmodel_p100, ~ Type|Group*Pooling), "pairwise", adjust="none") #try also without "pairwaise" & "consec"

#contrast(contrast(emmeans(rmodel_p100,~Group|Type*Pooling), "pairwise"), "pairwise", by=NULL)

p100emm <- contrast(emmeans(rmodel_p100, ~ Group|Type*Pooling), "pairwise", adjust="none")
summary(p100emm, infer=TRUE)

#eff_size(emm, sigma = sigma(rmodel_p100), edf = df.residual(rmodel_p100))


plot_model(rmodel_p100, type = "pred", terms= c("Group", "Type", "Pooling"))
plot_model(rmodel_p100, type = "pred", terms= c("Type", "Group", "Pooling"))
```

## Different coding scheme 
### sum coding: confirmatory 

```{r p100 sum coding}
# Sum-to-zero coding: confirmatory
#Investigate the main effect of the Group across all conditions and 2 clusters 

eps_p100_sum <- eps_p100 

contrasts(eps_p100_sum$Type) = contr.sum(5)
#contrasts(eps_p100_sum$Group) = contr.sum(2)
contrasts(eps_p100_sum$Pooling) = contr.sum(2)

model_p100_sum <- lmer(P100.mean ~ Group*Type*Pooling + (1|ID), eps_p100_sum, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_p100_sum)

anova(model_p100_sum)

rmodel_p100_sum <- rlmer(P100.mean ~  Group*Pooling*Type +(1|ID), eps_p100_sum, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer,  you can add Pooling as random slope in the random effects 

summary(rmodel_p100_sum) # no effect of the group but the effect of hemisphere is present 
tab_model(rmodel_p100_sum,  show.stat = T)
```

### custom coding 1: confirmatory
```{r custom coding p100}
# Custom coding: confirmatory 
# Custom coding -- (2) the difference between processing of linguistic (average of HF, LF, PW, NW) and non-linguistic stimuli (SS) in the LP cluster in both groups; 

eps_p100_custom <- eps_p100 %>%
  filter(Pooling == "L-P")
  
eps_p100_custom$factor <- with(eps_p100_custom, interaction(Group, Type), drop = TRUE)


HcSum <- rbind(
  cH00=c(BFHF=1/10, ICHF=1/10, BFLF=1/10,ICFLF=1/10, BFNW =1/10, ICNW =1/10, BFPW=1/10, ICPW=1/10, BFSS =1/10, ICSS =1/10),
  MainEffGr=c(BFHF=1/5, ICHF=-1/5, BFLF=1/5,ICLF=-1/5, BFNW =1/5, ICNW =-1/5, BFPW=1/5, ICPW=-1/5, BFSS =1/5, ICSS =-1/5),
  LvsS_BF=c(BFHF=1/4, ICHF=0, BFLF=1/4,ICLF=0, BFNW =1/4, ICNW =0, BFPW=1/4, ICPW=0, BFSS =-1, ICSS =0),
  LvsS_IC=c(BFHF=0, ICHF=1/4, BFLF=0,ICLF=1/4, BFNW =0, ICNW =1/4, BFPW=0, ICPW=1/4, BFSS =0, ICSS =-1),
  H5=c(BFHF=1, ICHF=-1, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H6=c(BFHF=0, ICHF=0, BFLF=1,ICLF=-1, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H7=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =1, ICNW =-1, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H8=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H9=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H10=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0))

fractions(t(HcSum))

ginv2 <- function(x)
  fractions(provideDimnames(ginv(x), 
                            base=dimnames(x)[2:1]))
(XcSum <-  ginv2(HcSum))

contrasts(eps_p100_custom$factor) = XcSum[,2:10]
#contrasts(eps_p100_custom$Group) = contr.sum(2)

model_p100_custom <- rlmer(P100.mean ~ factor + (1|ID), eps_p100_custom, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 

summary(model_p100_custom) #factorSvsL_BF    -0.1748     0.1564  -1.118 factorSvsL_IC    -0.6798     0.1689  -4.024 нет контраста у bf, зато есть контраст у ic - почему? 

tab_model(model_p100_custom,  show.stat = T)
```

S_vs_Ling=c(HF=-1/4, LF=-1/4, NW=-1/4, PW =-1/4, SS =1), Wds_vs_Pw=c(HF=1/2, LF=1/2, NW=-1/2, PW =-1/2, SS =0), HF_vs_LF=c(HF=1, LF=-1, NW=0, PW =-0, SS =0), PW_vs_NW=c(HF=0, LF=0, NW=-1, PW =1, SS =0))


### custom coding 2: confirmatory
```{r custom coding 2 p100 }
# Custom coding 2: confirmatory 
# The difference between processing of linguistic (average of HF, LF, PW, NW) and non-linguistic stimuli (SS) across two electrode clusters 

eps_p100_custom_2 <- eps_p100 

eps_p100_custom_2$factor <- with(eps_p100_custom_2, interaction(Group, Type, Pooling), drop = TRUE)

HcSum2 <- rbind(
  Ling_Pool_BF=c(BFHFLP=1/4, ICHFLP=0, BFLFLP=1/4,ICLFLP=0, BFNWLP =1/4, ICNWLP =0, BFPWLP=1/4, ICPWLP=0, BFSSLP =0, ICSSLP =0, BFHFRP=-1/4, ICHFRP=0, BFLFRP=-1/4,ICLFRP=0, BFNWRP =-1/4, ICNWRP =0, BFPWRP=-1/4, ICPWRP=0, BFSSRP =0, ICSSRP =0),
       Ling_Pool_IC=c(BFHFLP=0, ICHFLP=1/4, BFLFLP=0,ICLFLP=1/4, BFNWLP =0, ICNWLP =1/4, BFPWLP=0, ICPWLP=1/4, BFSSLP =0, ICSSLP =0, BFHFRP=0, ICHFRP=-1/4, BFLFRP=0,ICLFRP=-1/4, BFNWRP =0, ICNWRP =-1/4, BFPWRP=0, ICPWRP=-1/4, BFSSRP =0, ICSSRP =0),
  
      SS_Pool_BF=c(BFHFLP=0, ICHFLP=0, BFLFLP=0,ICLFLP=0, BFNWLP =0, ICNWLP =0, BFPWLP=0, ICPWLP=0, BFSSLP =1, ICSSLP =0, BFHFRP=0, ICHFRP=0, BFLFRP=0,ICLFRP=0, BFNWRP =0, ICNWRP =0, BFPWRP=0, ICPWRP=0, BFSSRP =-1, ICSSRP =0),

      SS_Pool_IC=c(BFHFLP=0, ICHFLP=0, BFLFLP=0,ICLFLP=0, BFNWLP =0, ICNWLP =0, BFPWLP=0, ICPWLP=0, BFSSLP =0, ICSSLP =1, BFHFRP=0, ICHFRP=0, BFLFRP=0,ICLFRP=0, BFNWRP =0, ICNWRP =0, BFPWRP=0, ICPWRP=0, BFSSRP =0, ICSSRP =-1))

#MainEffPool=c(BFHFLP=-1/10, ICHFLP=-1/10, BFLFLP=-1/10,ICLFLP=-1/10, BFNWLP =-1/10, ICNWLP =-1/10, BFPWLP=-1/10, ICPWLP=-1/10, BFSSLP =-1/10, ICSSLP =-1/10, BFHFRP=1/10, ICHFRP=1/10, BFLFRP=1/10,ICLFRP=1/10, BFNWRP =1/10, ICNWRP =1/10, BFPWRP=1/10, ICPWRP=1/10, BFSSRP =1/10, ICSSRP =1/10)) #RP minus LP 

fractions(t(HcSum2))

ginv2 <- function(x)
  fractions(provideDimnames(ginv(x), 
                            base=dimnames(x)[2:1]))
(XcSum2 <-  ginv2(HcSum2))

contrasts(eps_p100_custom_2$factor) = XcSum2
#contrasts(eps_p100_custom$Group) = contr.sum(2)


model_p100_custom_2 <- rlmer(P100.mean ~ factor + (1|ID), eps_p100_custom_2, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
summary(model_p100_custom_2)
tab_model(model_p100_custom_2, show.stat = T)
```

# N400 analysis 

## Merge
```{r echo=F}
allprint_long <- inner_join(allprint_long, cfit,by=c("ID"="ID"))
```

## Center the variables 
```{r}
# mean-centering of continuous variables
allprint_long$CFIT_centered <- center_scale(allprint_long$CFIT_SS)
allprint_long$Age_centered <- center_scale(allprint_long$Age)

allprint_long$CFIT_centered <-as.vector(t(allprint_long$CFIT_centered)) # you should consider another function
allprint_long$Age_centered <- as.vector(t(allprint_long$Age_centered))
```

## Average amp: 300-500 ms 
```{r}
midline_300_500 <- allprint_long %>% 
  dplyr::select(Pooling, Type, ID, Group, Age_centered, CFIT_centered, Gender, variable, value, time) %>%
  filter(Pooling %in% c("M-C"), time >299 & time<501)

avg_300_500 <- group_by(midline_300_500, ID, Type, Group, Pooling, Age_centered, CFIT_centered, Gender) %>%
  summarize_each(funs(mean(., na.rm=TRUE)), value)
```

## Check distribution
```{r}
ggplot(avg_300_500, aes(value, fill= Group))+
  geom_density(alpha=0.5)+
  facet_wrap(~Pooling)
```

## LMM
### rlmer
```{r}
empty_n400<- rlmer(value ~  (1|ID), avg_300_500, na.action=na.omit, REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #empty model for n400
summary(empty_n400)
1.0294/(1.0294+0.4938) #ICC= 0.6758141

rmodel_n400 <- rlmer(value ~  Group*Type +(1|ID), avg_300_500, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer 1 (Sergey's code)
summary(rmodel_n400) #
```
## Assumptions check
#### rlmer
```{r}
plot(rmodel_n400) # it is not totally fine, we should stick to robust lmm 
```

## Plot effects
```{r fig.height=4, fig.width =4}
n400_rlmer_plot <- ggstatsplot::ggcoefstats(
  x = rmodel_n400,
  conf.level = 0.95,
  exclude.intercept = FALSE,
    only.significant = FALSE,
  package = "ggsci",
  palette = "category20c_d3", 
  title="N400. Robust Estimation of Linear Mixed-Effects Model"
) +
  ggplot2::labs(x = "Regression Coefficient: N400 amplitude", y = "Fixed Effects")

n400_rlmer_plot
```
## Emmeans contrasts
```{r}
Effect(c("Group", "Type"), rmodel_n400)

plot(effect("Group:Type", rmodel_n400),
         axes=list(grid=TRUE)) # equivalent (plus grid)

emmeans(rmodel_n400, ~Type*Group)

contrast(emmeans(rmodel_n400, ~ Type|Group), "pairwise", adjust="none") #try also without "pairwaise" & "consec"

#contrast(contrast(emmeans(rmodel_n400,~Group|Type), "pairwise"), "pairwise", by=NULL)

contrast(emmeans(rmodel_n400, ~ Group*Type), "pairwise", adjust="none")

#eff_size(emm, sigma = sigma(rmodel_n400), edf = df.residual(rmodel_n400))


plot_model(rmodel_n400, type = "pred", terms= c("Group", "Type"))
plot_model(rmodel_n400, type = "pred", terms= c("Type", "Group"))


```

## Different coding scheme 
### sum coding: confirmatory
```{r}
# Sum-to-zero coding -- Investigate the main effect of the Group across all conditions in one cluster

avg_300_500$Type <- factor(avg_300_500$Type)
avg_300_500$Group <- factor(avg_300_500$Group)

avg_300_500_sum <- avg_300_500 

contrasts(avg_300_500$Type) = contr.sum(5)
#contrasts(eps_p100_sum$Group) = contr.sum(2)


model_n400_sum <- lmer(value ~ Group*Type + (1|ID), avg_300_500, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 
#summary(model_n400_sum)
#anova(model_n400_sum)

rmodel_n400_sum <- rlmer(value ~  Group*Type +(1|ID), avg_300_500, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) #rlmer,  you can add Pooling as random slope in the random effects 

summary(rmodel_n400_sum) # no effect of the group but the effect of hemisphere is present 
tab_model(rmodel_n400_sum, show.stat = T)
```

###custom coding 1: confirmatory
```{r}
# Custom coding -- the difference between processing of HF and LF, HF and NW in both groups; 

avg_300_500_custom <- avg_300_500 

  
avg_300_500_custom$factor <- with(avg_300_500_custom, interaction(Group, Type), drop = TRUE)


HcSum3 <- rbind(
  cH00=c(BFHF=1/10, ICHF=1/10, BFLF=1/10,ICFLF=1/10, BFNW =1/10, ICNW =1/10, BFPW=1/10, ICPW=1/10, BFSS =1/10, ICSS =1/10),
  MainEffGr=c(BFHF=1/5, ICHF=-1/5, BFLF=1/5,ICLF=-1/5, BFNW =1/5, ICNW =-1/5, BFPW=1/5, ICPW=-1/5, BFSS =1/5, ICSS =-1/5),
  HFvsLF_BF=c(BFHF=1, ICHF=0, BFLF=-1,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0),
  HFvsLF_IC=c(BFHF=0, ICHF=1, BFLF=0,ICLF=-1, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0),
  HFvsNW_BF=c(BFHF=1, ICHF=0, BFLF=0,ICLF=0, BFNW =-1, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  HFvsNW_IC=c(BFHF=0, ICHF=1, BFLF=0,ICLF=0, BFNW =0, ICNW =-1, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H7=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
  H8=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H9=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0), 
    H10=c(BFHF=0, ICHF=0, BFLF=0,ICLF=0, BFNW =0, ICNW =0, BFPW=0, ICPW=0, BFSS =0, ICSS =0))

fractions(t(HcSum3))

ginv2 <- function(x)
  fractions(provideDimnames(ginv(x), 
                            base=dimnames(x)[2:1]))
(XcSum3 <-  ginv2(HcSum3))

contrasts(avg_300_500_custom$factor) = XcSum3
#contrasts(avg_300_500_custom$Group) = contr.sum(2)


model_n400_custom <- rlmer(value ~ factor + (1|ID), avg_300_500_custom, na.action=na.omit,REML = FALSE) #non-robust linear mixed models 

summary(model_n400_custom) 
tab_model(model_n400_custom, show.stat = T)
```
S_vs_Ling=c(HF=-1/4, LF=-1/4, NW=-1/4, PW =-1/4, SS =1), Wds_vs_Pw=c(HF=1/2, LF=1/2, NW=-1/2, PW =-1/2, SS =0), HF_vs_LF=c(HF=1, LF=-1, NW=0, PW =-0, SS =0), PW_vs_NW=c(HF=0, LF=0, NW=-1, PW =1, SS =0))


# Behavioral & Neuro relation

## Merge ARFA & N400
```{r}
n400_arfa <- inner_join(avg_300_500, arfa, by=c("ID"="ID"))

## Center the variables 
# mean-centering of continuous variables
n400_arfa$Analogies_centered <- center_scale(n400_arfa$Analogies) #Analogies = Synonyms subtest
n400_arfa$Wdef_centered <- center_scale(n400_arfa$Wdef)

n400_arfa$Analogies_centered <-as.vector(t(n400_arfa$Analogies_centered))
n400_arfa$Wdef_centered <- as.vector(t(n400_arfa$Wdef_centered))

```

## Include only linguistic stimuli
```{r}
n400_arfa_ling <- filter(n400_arfa, Type!="Symbols")
```

## Explorative analysis

```{r}
# Is there any interaction between group & lexical skills in relation to N400 amplitude during reading 

rmodel_n400_arfa_1 <- rlmer(value ~  Group*Analogies_centered +(1|ID), n400_arfa_ling, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) # Synonyms/Analogies subtest: note, that Analogies is the original name, and this subtest mainly coded like that in the datasets
summary(rmodel_n400_arfa_1)
tab_model(rmodel_n400_arfa_1, show.stat = T)


rmodel_n400_arfa_2 <- rlmer(value ~  Group*Wdef_centered +(1|ID), n400_arfa_ling, na.action=na.omit,REML = FALSE, rho.sigma.e = psi2propII(smoothPsi, k = 2.28), rho.sigma.b = psi2propII(smoothPsi, k = 2.28)) # Word Definition subtest
summary(rmodel_n400_arfa_2) 
tab_model(rmodel_n400_arfa_2, show.stat = T)

plot(rmodel_n400_arfa_1) 
plot(rmodel_n400_arfa_2)
```

# Plots
## Basic averaged waveforms 

### Average the datasets
```{r}
average <- group_by(allprint_long, Type, Group, Pooling, time) %>%
  summarize_each(funs(mean(., na.rm=TRUE)), value)
```
### Plot the averaged waveforms for all Poolings
```{r}
grandav <- ggplot(average,aes(time, value))+
  scale_x_continuous(limits=c(-200,900), breaks=c(-200,0,200,400,600,800),expand=c(0,0)) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  labs(x="Time, ms", y = expression(paste("Amplitude (",mu,"V)")))+
  geom_vline(xintercept=0,size=0.25)+geom_hline(yintercept=0,size=0.25)+
  theme_minimal()+
  geom_line(aes(colour=Type))+
  facet_wrap(~Group+ Pooling)
grandav
```

## Plotting Waveforms and Difference waves with CIs and p-values

### datasets
```{r}
filtered_long <- allprint_long%>% dplyr::select(., Pooling, Type, ID, Group, value, time)%>%
  filter(., Pooling %in% c("L-P", "M-C", "R-P"))

filtered_long$Pooling <-  as.factor(filtered_long$Pooling)
filtered_long$Type <-  as.factor(filtered_long$Type)
filtered_long$Group <-  as.factor(filtered_long$Group)
```

### Calculate running within-subject CIs
```{r}
runningCIs <- filtered_long %>%
  group_by(Type, Pooling, time, Group, ID) %>%
  dplyr::summarise(value = mean(value)) %>%
  split(.$time) %>%
  map(~summarySEwithin(data = .,
                       measurevar = "value",
                       withinvars = c("Type", "Pooling"),
                       betweenvars = "Group",
                       idvar = "ID"))

wsCI <- map_df(runningCIs,magrittr::extract) %>%
  mutate(
    time = rep(unique(filtered_long$time),each = 30)) #Note, you'll have to change 15 to match the number of conditions (unique "time" across type*pooling)
```

### Basic plots
```{r}
erp.plot <- ggplot(filtered_long,aes(time, value))+
 scale_x_continuous(limits=c(-200,900), breaks=c(-200,0,100,200, 300, 400, 500, 600,700, 800),expand=c(0,0)) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal()
```

### Waveforms Symbols vs Linguistic stimuli, with within-subject CI

#### LP
```{r}
# filter and consider diff linguistic type as one 
wsCI_lp <- wsCI %>%
    filter(Pooling == "L-P")

levels(wsCI_lp$Type)[levels(wsCI_lp$Type)==c("HighFreq", "Lowfreq", "Non-words", "Pseudowords")] <- "Linguistic stimuli"

##filter poolings and recode linguistic factors
filtered_long_lp<- filtered_long %>%
  filter(Pooling == "L-P")

  levels(filtered_long_lp$Type)[levels(filtered_long_lp$Type)==c("HighFreq", "Lowfreq", "Non-words", "Pseudowords")] <- "Linguistic stimuli"
```

```{r}
# plot
erp.plot.1 <- 
  ggplot(filtered_long_lp,aes(time, value))+
  scale_y_continuous(breaks = c(-5, -2.5, 0, 2.5, 5, 7.5))+
scale_x_continuous(limits=c(-200,855), breaks=c(0,100,200, 300, 400, 500, 600,700, 800),expand=c(0,0))+
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  theme_light()+
  geom_ribbon(data = wsCI_lp,
              aes(ymin = value-ci,
                  ymax = value+ci,
                  fill = Type,
                  colour = Type),
              linetype=0,
              alpha = 0.3) +
  stat_summary(fun = mean,
               geom = "line",
               size = 1,
               aes(colour = Type),
               alpha = 0.8)+
  labs(x = "Time (ms)",
       y = expression(paste("Amplitude (",mu,"V)")),
       colour = "")+
  geom_vline(xintercept = 0,linetype = "dashed" )+
  geom_hline(yintercept = 0,linetype = "dashed")+
  facet_wrap(~Pooling+Group, ncol=3)
erp.plot.1  
  
```

#### RP
```{r}
# filter and consider diff linguistic type as one 
wsCI_rp <- wsCI %>%
    filter(Pooling == "R-P")

levels(wsCI_rp$Type)[levels(wsCI_rp$Type)==c("HighFreq", "Lowfreq", "Non-words", "Pseudowords")] <- "Linguistic stimuli"

##filter poolings and recode linguistic factors
filtered_long_rp<- filtered_long %>%
  filter(Pooling == "R-P")

  levels(filtered_long_rp$Type)[levels(filtered_long_rp$Type)==c("HighFreq", "Lowfreq", "Non-words", "Pseudowords")] <- "Linguistic stimuli"

```

```{r}
# plot
erp.plot.1.rp <- 
  ggplot(filtered_long_rp,aes(time, value))+
  scale_y_continuous(breaks = c(-5, -2.5, 0, 2.5, 5, 7.5))+
  scale_x_continuous(limits=c(-200,855), breaks=c(0,100,200, 300, 400, 500, 600,700, 800),expand=c(0,0)) +
  scale_color_brewer(palette = "Dark2") +
  scale_fill_brewer(palette = "Dark2") +
  theme_light()+
  geom_ribbon(data = wsCI_rp,
              aes(ymin = value-ci,
                  ymax = value+ci,
                  fill = Type,
                  colour = Type),
              linetype=0,
              alpha = 0.3) +
  stat_summary(fun = mean,
               geom = "line",
               size = 1,
               aes(colour = Type),
               alpha = 0.8)+
  labs(x = "Time (ms)",
       y = expression(paste("Amplitude (",mu,"V)")),
       colour = "")+
  geom_vline(xintercept = 0,linetype = "dashed" )+
  geom_hline(yintercept = 0,linetype = "dashed")+
  facet_wrap(~Pooling+Group, ncol=3)
erp.plot.1.rp
```
## Arrange plots into figures
```{r}
figure_3 <- ggarrange(erp.plot.1, erp.plot.1.rp,
                    ncol = 1, nrow = 2, common.legend = TRUE, legend = "right")
figure_3
```


### Waveforms with within-subject CIs and means, different plots for different conditions
```{r}
# filter and consider diff linguistic type as one 
wsCI_lp_mp <- wsCI %>%
    filter(Pooling %in% c("L-P", "M-C"))

##filter poolings and recode linguistic factors
filtered_long_lp_mp<- filtered_long %>%
    filter(Pooling %in% c("L-P", "M-C"))

erp.plot.2 <- 
  ggplot(filtered_long_lp_mp,aes(time, value))+
  scale_x_continuous(limits=c(-200,900), breaks=c(0,200, 400, 600, 800),expand=c(0,0)) +
  scale_color_manual(values = c("#FF3366", "#66CCCC"),name="",labels=c("")) +
  scale_fill_manual(values = c("#FF3366", "#66CCCC"),name="Group",labels=c("BFC", "IC")) +
  theme_light()+
  geom_ribbon(data = wsCI_lp_mp,
              aes(ymin = value-ci,
                  ymax = value+ci,
                  fill = Group,
                  colour = Group),
              linetype=0,
              alpha = 0.3) +
  stat_summary(fun = mean,
               geom = "line",
               size = 0.8,
               aes(colour = Group),
               alpha = 0.8)+
  labs(x = "Time (ms)",
       y = expression(paste("Amplitude (",mu,"V)")),
       colour = "")+
  geom_vline(xintercept = 0,linetype = "dashed" )+
  geom_hline(yintercept = 0,linetype = "dashed")+
  facet_wrap(~Pooling+Type, ncol=5)
erp.plot.2
```
